#install.packages(c("GGally", "naniar", "plotly")) 
library(tidyverse)
library(GGally)
library(naniar)
library(plotly)
library(corrplot)
hep <- read_csv("Hep2012.csv")

hep <- hep[,c("Athlete", "100m Hurdles","High Jump", "Shot Put","200m" , "Long Jump","Javelin", "800m"   )]


hep <- hep %>%
  mutate(total = rowSums(across(`100m Hurdles`:`800m`))) %>%
  arrange(desc(total))

Question 1.

 cor <- cor(cor(hep[,2:8],use = "pairwise.complete.obs"))
corrplot(cor)

High Jump and long jump have a high positive correlation and the javelin and 200m have a high negative correlation

Question 2.

library(tidyverse)
plot <- ggpairs(hep ,columns = 2:8, columnLables = c("100m Hurdles" , "High Jump" , "Shot Put" , "200m" , "Long Jump" , "Javelin" , "800m") , aes(text = Athlete) )
ggplotly(plot , tooltip = c("text" , "x" , "y")) %>% 
  highlight(on ="plotly_hover" , off = "plotly_deselect")

Shot and 200m by SKUJYTE Austra is an outlier and so is 200m and 800m by OESER Jennifier
Jennifier didnt run her 800m resulting in it being an outlier , as for Austra she also has a zero for her 200m resulting the data being skewed.

Question 3.

library(MASS)
parcoord(hep[,c(2,3,4,5,6,7,8)], scale = "globalMinMax"  )

Yes you can see loads of atheletes with zero score in multiple events potentially due to injury or other incidents.

hep2 <- hep %>%  mutate(Winners = hep$Athlete[c(1:3 , 4:38)] )
hep2$Winners[hep2$Winners != c("ENNIS Jessica" , "SCHWARZKOPF Lilli" ,"CHERNOVA Tatyana")] <- "Other"
cols <- c( "ENNIS Jessica" = "red" , Others = "grey")
ggparcoord(hep2 , columns = 2:8 ,  groupColumn  = "Winners" , scale = "globalminmax") + scale_color_manual(values =c("ENNIS Jessica" = "red" , "SCHWARZKOPF Lilli"="blue", "CHERNOVA Tatyana"="orange" , "Other" = "grey"))

hepc <- hep
for (i in 2:8) hepc[,i] <- rank(rowSums(hep[,2:i], na.rm=T))
ggparcoord(hepc , columns = 2:8 ,  groupColumn  = "Athlete" , scale = "globalminmax") + scale_color_manual(values =c("ENNIS Jessica" = "red" , "SCHWARZKOPF Lilli"="blue", "CHERNOVA Tatyana"="orange" , "Other" = "grey"))

No you can clearly see from this graph that none of them lead every event at once Jessica actually is only 2nd in one and the first for the rest as for 2nd and 3rd place they vary from position to position in each event and did not lead in any event actually, so in coclusion jessica was only one event off leading every single event in the heptholon .